pacman::p_load(stm,sjPlot,tm,quanteda,tidyr,tidyverse,tidytext,MatchIt,ggrepel,splitstackshape,
               MASS)


#Content of resolutions
# Figure A.3: Length of resolutions and China’s presence


hrc_data_clean <- readRDS("mpu.RData")

hrc_data_text0 <- hrc_data_clean[(hrc_data_clean$vote_type=="adopted by vote" | hrc_data_clean$vote_type=="Adopted by vote"),]
hrc_data_text0 <- hrc_data_text0[,c("year","title","resolution","china_vote","text","country_target","country_target_filt")]
hrc_data_text0 <- unique(hrc_data_text0)
hrc_data_text0$document <- 1:dim(hrc_data_text0)[1]
hrc_data_text <- hrc_data_text0
hrc_data_text$text <- str_squish(hrc_data_text$text)
hrc_data_text$text <- str_replace(hrc_data_text$text, '"', '')
hrc_data_text$text <- str_replace(hrc_data_text$text, "'", "")
hrc_data_text <- hrc_data_text[!is.na(hrc_data_text$title),]
hrc_data_text$text <- gsub("Adopted by a recorded vote.*","",hrc_data_text$text)

hrc_data_text$text <- str_squish(hrc_data_text$text)
hrc_data_text$text <- tolower(hrc_data_text$text)
hrc_data_text$text <- gsub('"', '',hrc_data_text$text)
hrc_data_text$text <- gsub('"', '',hrc_data_text$text)
words_to_remove <- c("human","rights","council","international",
                     "united","nations","ahrcwg","ahrcwgcrp",
                     "ahrcwgtfadd","ahrcwgg","ahrcres","ahrc","ahrcac","ahrcadd",
                     "ahrccrp","ahrccrprev","ahrcdec","ahrcl","ahrcress","ahrcs",
                     "ahrcsadd","ahrcsl","ahrcwgtf","ahrcwgtfadd",
                     "ii","iii"," iv "," v "," vi ","vii","viii"," ix "," x ",
                     "xi","xii","xiii","xiv","xv","xvi","xvii","xviii",
                     "xix","xx","resolution","resolutions","adopted","session")
hrc_data_text$text <- gsub(paste(words_to_remove, collapse="|"), "", hrc_data_text$text)
hrc_data_text$text <- str_squish(hrc_data_text$text)

hrc_data_text <- as.data.frame(hrc_data_text)

hrc_data_text$words <- lengths(gregexpr("\\W+", hrc_data_text$text)) + 1
hrc_data_text$china_present <- ifelse(hrc_data_text$year==2013 | hrc_data_text$year==2020,"No","Yes")

m.out <- matchit(china_present ~ title, data = hrc_data_text, 
                 method = "exact")
exact.data <- match.data(m.out)
exact.data$china_vote <- as.factor(exact.data$china_vote)
exact.data$title <- as.factor(exact.data$title)
lm_words1 <- lm(words ~ year + china_present + title, data = exact.data,weights=exact.data$weights)
lm_words2 <- lm(words ~ year + china_present + title, data = exact.data[exact.data$country_target==1,],weights=exact.data$weights[exact.data$country_target==1])
lm_words3 <- lm(words ~ year + china_present + title, data = exact.data[exact.data$country_target_filt==1,],weights=exact.data$weights[exact.data$country_target_filt==1])

names(lm_words1$coefficients)[3] <- "Yes"
names(lm_words2$coefficients)[3] <- "Yes"
names(lm_words3$coefficients)[3] <- "Yes"

png("Figure_A3.png",2700,1800,res=350)
plot_models(lm_words1,lm_words2,lm_words3,rm.terms=c(c(names(lm_words1$coefficients)[4:length(names(lm_words1$coefficients))]),
                                                     "year"),spacing=0.9) + ylim(-500,500) + 
  labs(color="Sample") + 
  scale_color_manual(labels=c("All Resolutions","Country Resolutions","Country Resolutions (critical)"),
                     values=c("blue","green4","red")) +
  ylab("Effect of China's presence in Number of Words of Resolutions") +
  xlab("China's Presence") + theme_light() + geom_hline(yintercept = 0,linetype="dashed",color="black")
dev.off()


### Figure A.4: Difference in content in resolution with China’s presence

hrc_data_text$text <- gsub("<.*?>", "",hrc_data_text$text)
hrc_data_text$text <- str_replace_all(hrc_data_text$text,"[^[:graph:]]", " ")
hrc_data_text$text <- tolower(hrc_data_text$text)
hrc_data_text$text <- removeNumbers(hrc_data_text$text)
hrc_data_text$text <- stripWhitespace(hrc_data_text$text)
stopwords_regex <- paste(stopwords('en'), collapse = '\\b|\\b')
stopwords_regex <- paste0('\\b', stopwords_regex, '\\b')
hrc_data_text$text <- stringr::str_replace_all(hrc_data_text$text, stopwords_regex, '')
hrc_corpus <- corpus(hrc_data_text, text_field = "text")
hrc_tidy2 <- tidy(hrc_corpus)

hrc_words <- hrc_tidy2 %>%
  unnest_tokens(word, text) %>%
  anti_join(stop_words)

hrc_freq <- hrc_words %>%
  count(china_present,title,document, word,year) %>%
  #complete(china_present, word, word, fill = list(n = 0)) %>%
  #group_by(word,title,document) %>%
  mutate(resolution_total = sum(n),
         percent = n / resolution_total) %>%
  ungroup()

m.out <- matchit(china_present ~ title + word, data = hrc_freq, 
                 method = "exact")
exact.data <- match.data(m.out)

models <- data.frame(word=rep(NA,length(words)),term=rep("China's presence",length(words)),
                     estimate=rep(NA,length(words)),std.error=rep(NA,length(words)),
                     statistic=rep(NA,length(words)),p.value=rep(NA,length(words)))

for (i in 1:length(words)){
  model_aux <- lm(percent ~ china_present + year,data = exact.data[exact.data$word==words[i],],
                  weights=exact.data$weights[exact.data$word==words[i]])
  models$word[i] <- words[i]
  models$estimate[i] <- summary(model_aux)$coefficients[2,1]
  models$std.error[i] <- summary(model_aux)$coefficients[2,2]
  models$statistic[i] <- summary(model_aux)$coefficients[2,3]
  models$p.value[i] <- summary(model_aux)$coefficients[2,4]
  print(round((i/length(words))*100,2))
}


png("Figure_A4.png",3000,2100,res=350)
models %>%
  #mutate(adjusted.p.value = p.adjust(p.value)) %>%
  ggplot(aes(estimate, p.value)) +
  geom_point() +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.001,
                                                    decimal.mark = '.')) +
  geom_text(aes(label = word), vjust = 1, hjust = 1,
            check_overlap = TRUE) +
  xlab("Estimated change because of China's presence") +
  ylab("p-value") + xlim(-0.00005,0.00005) +
  ggtitle("Words that appear more than 5 times in resolutions") +
  geom_hline(yintercept=0.05, linetype="dashed", color = "red") +
  theme_light()
dev.off()